*10_summary_table.do
cap clear

global indir1 "/disk/bulkw/mrashid/matching_project/master_crosswalks"
global censdata "/homes/data/census-ipums/v2019/dta/"

global root    = "/disk/bulkw/nencka/schooling_pandemic/2021_10_18_final/"
global input   "$root/Input"
global scripts "$root/Scripts"
global temp    "$root/Temp"
global output  "$root/Output"
global log     "$root/Log"
global figures "$root/Figures"
global tables "$root/Tables"


log using      "$log/10_summary_table", replace text

set scheme plotplain, perm 


*****************************************************************************
*****************************************************************************
*****************************************************************************

*Save graphs
cd "$tables/"


*Read in long-run analysis sample

use $temp//longrun_analysis_sample.dta, clear
gen ind_longrun=1

desc, fullnames


*Append on short-run analysis sample

append using "$temp/analysis_data_shortrun.dta", force 
replace ind_longrun=0 if missing(ind_longrun) & census_year==1920
replace ind_longrun=-1 if missing(ind_longrun) & census_year==1910

tab ind_longrun, m

*Tabulate summary statistics
tab census_year sex, m
gen male = sex==1
replace male=0 if missing(sex)

tab census_year race, m
gen white = race==100

tab ind_longrun sex, m
tab ind_longrun male, m
tab sex male, m
replace male = 1 if sex==1 | ind_longrun==1
tab ind_longrun male, m

tab age ind_longrun, m
replace age = 1940-birthyr-20 if ind_longrun==1 
replace age_at_census = 1940-birthyr-20 if ind_longrun==1 

tab age ind_longrun, m

replace age_bin = 1 if inrange(age_at_1920,0,5) | inrange(age,0,5)
replace age_bin = 2 if inrange(age_at_1920,6,10) | inrange(age,6,10)
replace age_bin = 3 if inrange(age_at_1920,11,14) | inrange(age,11,14)
replace age_bin = 4 if inrange(age_at_1920,15,18) | inrange(age,15,18)
replace age_bin = 5 if inrange(age_at_1920,19,21) | inrange(age,19,21)
replace age_bin = 6 if inrange(age_at_1920,22,25) | inrange(age,22,25)
tab age ind_longrun, m
tab age age_bin, m


tab school ind_longrun, m
tab in_school ind_longrun, m
replace in_school=1 if school==2 | school_20==2
replace in_school=0 if school==1 | school_20==1
tab in_school ind_longrun, m
bys ind_longrun: tab age in_school, m

levelsof age_bin, local(agebins)

foreach binval of local agebins {
	gen in_school`binval' = in_school
	replace in_school`binval'=. if age_bin != `binval'

	tab age in_school`binval', m
}


label variable age_at_census "Age (in Childhood Census)"
label variable male "Male (\%)"
label variable white "White (\%)"
label variable in_school "Attending School (\%)"
label variable incwage_40_orig "Wage Income (\\$)"
label variable incnonwg_40 "1(Non-Wage Income $\geq$ \\$50)"
label variable educ_years "Educational Attainment (years)"

replace annual_hours = exp(annual_hours)-1
sum annual_hours, d

label variable days_closed "Number of Days Closed"

label variable annual_hours "Hours Worked (Annual)"
label variable wkswork1_40 "Weeks Worked (Annually)"
label variable hrswork1_40 "Hours Worked (Weekly)
label variable ind_gte50_incwage "1(Wage Income $\geq$ \\$50)"

label variable in_school1 "Attending School Age 0--5 (\%)"
label variable in_school2 "Attending School Age 6--10 (\%)"
label variable in_school3 "Attending School Age 11--14 (\%)"
label variable in_school4 "Attending School Age 15--18 (\%)"
label variable in_school5 "Attending School Age 19--21 (\%)"
label variable in_school6 "Attending School Age 22--25 (\%)"


estpost sum male white age_at_census in_school in_school1 in_school2 in_school3 in_school4 in_school5 in_school6 days_closed if ind_longrun==-1
est store a
estpost sum male white age_at_census in_school in_school1 in_school2 in_school3 in_school4 in_school5 in_school6 days_closed if ind_longrun==0
est store b
estpost sum male white age_at_census in_school in_school1 in_school2 in_school3 in_school4 in_school5 in_school6 days_closed educ_years incwage_40_orig incnonwg_40 annual_hours wkswork1 hrswork1 if ind_longrun==1
est store c


esttab a b c using summarystats.tex, replace ///
mtitles("\textbf{\emph{Short-Run 1910 Sample}}" ///
	"\textbf{\emph{Short-Run 1920 Sample}}" ///
	"\textbf{\emph{Matched 1920--1940 Sample}}") ///
refcat(white "\textbf{\emph{Variables from 1910--1920}}" educ_years "\textbf{\emph{Variables from 1940}}") ///
collabels(\multicolumn{1}{c}{{Mean}} \multicolumn{1}{c}{{Obs}}) ///
cells("mean(fmt(2)) count(fmt(%20.10gc))") label nonumber f noobs alignment(S) booktabs

clear
log close

